import pandas as pd
import altair as alt
import geopandas as gpd
ukraine = gpd.read_file("ukraine.json")
df = pd.read_csv('population_trends.csv')
ukraine.head()
| GID_0 | NAME_0 | GID_1 | NAME_1 | VARNAME_1 | NL_NAME_1 | TYPE_1 | ENGTYPE_1 | CC_1 | HASC_1 | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | UKR | Ukraine | UKR.1_1 | Cherkasy | Cherkas'ka Oblast'|Cherkasskaya Oblast'|Cherkassy | None | Oblast' | Region | None | UA.CK | MULTIPOLYGON (((31.32614 48.74507, 31.31716 48... |
| 1 | UKR | Ukraine | UKR.2_1 | Chernihiv | Chernigov|Tschernigow | None | Oblast' | Region | None | UA.CH | MULTIPOLYGON (((33.09283 50.50966, 33.09261 50... |
| 2 | UKR | Ukraine | UKR.3_1 | Chernivtsi | Chernivets'ka Oblast'|Chernovitskaya Oblast'|C... | None | Oblast' | Region | None | UA.CV | MULTIPOLYGON (((24.93280 47.72794, 24.93301 47... |
| 3 | UKR | Ukraine | UKR.4_1 | Crimea | Crimée|Criméia|Krim|Krymskaya Respublika|Respu... | None | Autonomous Republic | Autonomous Republic | None | UA.KR | MULTIPOLYGON (((33.79291 44.39153, 33.79465 44... |
| 4 | UKR | Ukraine | UKR.5_1 | Dnipropetrovs'k | Dnipropetrovsk|Dniepropietrovsk|Dnjepropetrowsk | None | Oblast' | Region | None | UA.DP | MULTIPOLYGON (((33.93176 47.48407, 33.92332 47... |
df.head()
| region | year | rate | |
|---|---|---|---|
| 0 | Ukraine | 1989 | 1.7 |
| 1 | Ukraine | 1990 | 0.5 |
| 2 | Ukraine | 1991 | -0.8 |
| 3 | Ukraine | 1992 | -2.0 |
| 4 | Ukraine | 1993 | -3.5 |
df_last_year = df[df['year'] == df.year.max()].reset_index(drop=True)
df_last_year.head()
| region | year | rate | |
|---|---|---|---|
| 0 | Ukraine | 2019 | -6.6 |
| 1 | Crimea | 2019 | NaN |
| 2 | Vinnytsya | 2019 | -7.9 |
| 3 | Volyn | 2019 | -2.8 |
| 4 | Dnipropetrovs'k | 2019 | -8.9 |
len([region for region in ukraine.NAME_1.values if region not in df_last_year.region.values])
0
ukraine.rename({"NAME_1": "region"}, axis=1, inplace=True)
ukraine.drop(
columns=[col_name for col_name in ukraine.columns if col_name not in ['region', 'geometry', 'rate']],
inplace=True)
merged = ukraine.merge(df_last_year, on='region')
merged.head()
| region | geometry | year | rate | |
|---|---|---|---|---|
| 0 | Cherkasy | MULTIPOLYGON (((31.32614 48.74507, 31.31716 48... | 2019 | -10.0 |
| 1 | Chernihiv | MULTIPOLYGON (((33.09283 50.50966, 33.09261 50... | 2019 | -12.5 |
| 2 | Chernivtsi | MULTIPOLYGON (((24.93280 47.72794, 24.93301 47... | 2019 | -3.1 |
| 3 | Crimea | MULTIPOLYGON (((33.79291 44.39153, 33.79465 44... | 2019 | NaN |
| 4 | Dnipropetrovs'k | MULTIPOLYGON (((33.93176 47.48407, 33.92332 47... | 2019 | -8.9 |
merged.fillna(0, inplace=True)
domain = [merged.min()['rate'], 0, merged.max()['rate']]
range_ = ['#d73027', '#ffffbf', '#1a9850']
C:\Users\Xiaomi\AppData\Local\Temp/ipykernel_128/3637414924.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. domain = [merged.min()['rate'], 0, merged.max()['rate']]
map_plot = alt.Chart(merged).mark_geoshape(stroke='grey').encode(
tooltip = ['region:N', 'rate:N'],
color=alt.condition('datum.rate==0',
alt.ColorValue('lightgrey'),
alt.Color('rate:Q',
scale=alt.Scale(domain=domain, range=range_),
legend=alt.Legend(orient='none',
legendX=0, legendY=240,
direction='horizontal',
titleAnchor='start')))
)
rate_by_year = pd.melt(
df, id_vars=['year', 'region'],
value_vars=['rate'])
rate_by_year.rename({"value": "rate"}, axis=1,inplace=True)
rate_by_year = rate_by_year[rate_by_year['region'] != "Ukraine"]
rate_by_year.head()
| year | region | variable | rate | |
|---|---|---|---|---|
| 31 | 1989 | Crimea | rate | 3.9 |
| 32 | 1990 | Crimea | rate | 2.5 |
| 33 | 1991 | Crimea | rate | 0.9 |
| 34 | 1992 | Crimea | rate | -0.7 |
| 35 | 1993 | Crimea | rate | -2.7 |
lines_plot = alt.Chart(rate_by_year).mark_line().encode(
x=alt.X("year", axis=alt.Axis(format='f')),
y="rate",
color="region",
tooltip = ['region', 'rate:N', 'year:N']
)
alt.hconcat(map_plot, lines_plot).configure_view(strokeWidth=0)